﻿/*
 * Copyright (c) 2016 John May <jwmay@users.sf.net>
 *
 * Contact: cdk-devel@lists.sourceforge.net
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version. All we ask is that proper credit is given
 * for our work, which includes - but is not limited to - adding the above
 * copyright notice to the beginning of your source code files, and to any
 * copyright notice that you may distribute with programs based on this work.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
 */

using NCDK.Common.Collections;
using System;
using System.Collections.Generic;
using System.Text;

namespace NCDK.SMARTS
{
    /// <summary>
    /// Modes of the extractor.
    /// </summary>
    public enum SubstructureSelectionMode
    {
        /// <summary>
        /// SMARTS similar to JCompoundMapper.
        /// </summary>
        JCompoundMapper = 1,

        /// <summary>
        /// exact SMARTS.
        /// </summary>
        ExactSmarts = 2,
    }

    /// <summary>
    /// Utility class to create SMARTS that match part (substructure) of a molecule.
    /// SMARTS are generated by providing the atom indexes. An example use cases is
    /// encoding features from a fingerprint.
    /// </summary>
    /// <remarks>
    /// <para>
    /// The extractor has two modes. <see cref="SubstructureSelectionMode.ExactSmarts"/> (default) captures the element,
    /// valence, hydrogen count, connectivity, and charge in the SMARTS atom expressions.
    /// The alternative mode, <see cref="SubstructureSelectionMode.JCompoundMapper"/>, only captures the element,
    /// non-zero charge, and peripheral bonds. Although the later looks cleaner, the
    /// peripheral bonds intend to capture the connectivity of the terminal atoms but
    /// since the valence is not bounded further substitution is still allowed. This
    /// mirrors functionality from jCompoundMapper <token>cdk-cite-Hinselmann2011</token>.
    /// </para>
    /// <para>
    /// The difference is easily demonstrated for methyl. Consider the compound
    /// of 2-methylpentane "CC(C)CCC", if we extract one of the methyl atoms
    /// depending on the mode we obtain "[CH3v4X4+0]" or "C*". The first
    /// of these patterns (obtained by <see cref="SubstructureSelectionMode.ExactSmarts"/>) matches the compound in
    /// <b>three places</b> (the three methyl groups). The second matches <b>six</b>
    /// times (every atom) because the substituion on the carbon is not locked.
    /// A further complication is introduced by the inclusion of the peripheral atoms,
    /// for 1H-indole "[nH]1ccc2c1cccc2" we can obtain the SMARTS "n(ccc(a)a)a"
    /// that doesn't match at all. This is because one of the aromatic atoms ('a')
    /// needs to match the nitrogen.
    /// </para>
    /// </remarks>
    /// <example>
    /// <include file='IncludeExamples.xml' path='Comments/Codes[@id="NCDK.SMARTS.SmartsFragmentExtractor_Example.cs"]/*' />
    /// </example>
    // @author Nikolay Kochev
    // @author Nina Jeliazkova
    // @author John May 
    public sealed class SmartsFragmentExtractor
    {
        // molecule being selected over
        private readonly IAtomContainer mol;

        // fast-access mol graph data structures
        private readonly int[][] atomAdj, bondAdj;
        private readonly int[] deg;

        // SMARTS atom and bond expressions
        private readonly string[] aexpr;
        private readonly string[] bexpr;

        // SMARTS traversal/generation
        private readonly int[] avisit;
        private readonly int[] rbnds;
        private readonly int[] rnums;
        private int numVisit;

        // which mode should SMARTS be encoded in
        private SubstructureSelectionMode mode = SubstructureSelectionMode.ExactSmarts;

        /// <summary>
        /// Create a new instance over the provided molecule.
        /// </summary>
        /// <param name="mol">molecule</param>
        public SmartsFragmentExtractor(IAtomContainer mol)
        {
            this.mol = mol;

            var numAtoms = mol.Atoms.Count;
            var numBonds = mol.Bonds.Count;

            // build fast access
            this.deg = new int[numAtoms];
            this.atomAdj = Arrays.CreateJagged<int>(numAtoms, 4);
            this.bondAdj = Arrays.CreateJagged<int>(numAtoms, 4);
            this.aexpr = new string[numAtoms];
            this.bexpr = new string[numBonds];
            this.avisit = new int[numAtoms];
            this.rbnds = new int[numBonds];
            this.rnums = new int[100]; // max 99 in SMILES/SMARTS

            // index adjacency information and bond expressions for quick
            // reference and traversal
            for (int bondIdx = 0; bondIdx < numBonds; bondIdx++)
            {
                var bond = mol.Bonds[bondIdx];
                var beg = bond.Begin;
                var end = bond.End;
                var begIdx = mol.Atoms.IndexOf(beg);
                var endIdx = mol.Atoms.IndexOf(end);
                this.bexpr[bondIdx] = EncodeBondExpr(bondIdx, begIdx, endIdx);

                // make sufficient space
                if (deg[begIdx] == atomAdj[begIdx].Length)
                {
                    atomAdj[begIdx] = Arrays.CopyOf(atomAdj[begIdx], deg[begIdx] + 2);
                    bondAdj[begIdx] = Arrays.CopyOf(bondAdj[begIdx], deg[begIdx] + 2);
                }
                if (deg[endIdx] == atomAdj[endIdx].Length)
                {
                    atomAdj[endIdx] = Arrays.CopyOf(atomAdj[endIdx], deg[endIdx] + 2);
                    bondAdj[endIdx] = Arrays.CopyOf(bondAdj[endIdx], deg[endIdx] + 2);
                }

                atomAdj[begIdx][deg[begIdx]] = endIdx;
                bondAdj[begIdx][deg[begIdx]] = bondIdx;
                atomAdj[endIdx][deg[endIdx]] = begIdx;
                bondAdj[endIdx][deg[endIdx]] = bondIdx;

                deg[begIdx]++;
                deg[endIdx]++;
            }

            // pre-generate atom expressions
            for (int atomIdx = 0; atomIdx < numAtoms; atomIdx++)
                this.aexpr[atomIdx] = EncodeAtomExpr(atomIdx);
        }

        /// <summary>
        /// Set the mode of SMARTS substructure selection
        /// </summary>
        /// <param name="mode">the mode</param>
        public void SetMode(SubstructureSelectionMode mode)
        {
            // check arg
            switch (mode)
            {
                case SubstructureSelectionMode.ExactSmarts:
                case SubstructureSelectionMode.JCompoundMapper:
                    break;
                default:
                    throw new ArgumentException("Invalid mode specified!");
            }
            this.mode = mode;

            // re-gen atom expressions
            int numAtoms = mol.Atoms.Count;
            for (int atomIdx = 0; atomIdx < numAtoms; atomIdx++)
                this.aexpr[atomIdx] = EncodeAtomExpr(atomIdx);
        }

        /// <summary>
        /// Generate a SMARTS for the substructure formed of the provided
        /// atoms.
        /// </summary>
        /// <param name="atomIdxs">atom indexes</param>
        /// <returns>SMARTS, null if an empty array is passed</returns>
        public string Generate(IReadOnlyList<int> atomIdxs)
        {
            if (atomIdxs == null)
                throw new ArgumentNullException(nameof(atomIdxs), "No atom indexes provided");
            if (atomIdxs.Count == 0)
                return null; // makes sense?

            // special case
            if (atomIdxs.Count == 1 && mode == SubstructureSelectionMode.ExactSmarts)
                return aexpr[atomIdxs[0]];

            // initialize traversal information
            Arrays.Fill(rbnds, 0);
            Arrays.Fill(avisit, 0);
            foreach (int atmIdx in atomIdxs)
                avisit[atmIdx] = -1;

            // first visit marks ring information
            numVisit = 1;
            foreach (int atomIdx in atomIdxs)
            {
                if (avisit[atomIdx] < 0)
                    MarkRings(atomIdx, -1);
            }

            // reset visit flags and generate
            numVisit = 1;
            foreach (int atmIdx in atomIdxs)
                avisit[atmIdx] = -1;

            // second pass builds the expression
            var sb = new StringBuilder();
            for (int i = 0; i < atomIdxs.Count; i++)
            {
                if (avisit[atomIdxs[i]] < 0)
                {
                    if (i > 0)
                        sb.Append('.');
                    EncodeExpr(atomIdxs[i], -1, sb);
                }
            }

            return sb.ToString();
        }

        /// <summary>
        /// Recursively marks ring closures (back edges) in the <see cref="rbnds"/>
        /// array in a depth first order.
        /// </summary>
        /// <param name="idx">atom index</param>
        /// <param name="bprev">previous bond</param>
        private void MarkRings(int idx, int bprev)
        {
            avisit[idx] = numVisit++;
            var d = deg[idx];
            for (int j = 0; j < d; j++)
            {
                int nbr = atomAdj[idx][j];
                int bidx = bondAdj[idx][j];
                if (avisit[nbr] == 0 || bidx == bprev)
                    continue; // ignored
                else if (avisit[nbr] < 0)
                    MarkRings(nbr, bidx);
                else if (avisit[nbr] < avisit[idx])
                    rbnds[bidx] = -1; // ring closure
            }
        }

        /// <summary>
        /// Recursively encodes a SMARTS expression into the provides
        /// string builder.
        /// </summary>
        /// <param name="idx">atom index</param>
        /// <param name="bprev">previous bond</param>
        /// <param name="sb">destition to write SMARTS to</param>
        private void EncodeExpr(int idx, int bprev, StringBuilder sb)
        {
            avisit[idx] = numVisit++;
            sb.Append(aexpr[idx]);
            var d = deg[idx];

            var remain = d;
            for (int j = 0; j < d; j++)
            {
                var nbr = atomAdj[idx][j];
                var bidx = bondAdj[idx][j];

                // ring open/close
                if (rbnds[bidx] < 0)
                {
                    // open
                    var rnum = ChooseRingNumber();
                    if (rnum > 9)
                        sb.Append('%');
                    sb.Append(rnum);
                    rbnds[bidx] = rnum;
                }
                else if (rbnds[bidx] > 0)
                {
                    // close
                    var rnum = rbnds[bidx];
                    ReleaseRingNumber(rnum);
                    if (rnum > 9)
                        sb.Append('%');
                    sb.Append(rnum);
                }

                if (mode == SubstructureSelectionMode.ExactSmarts && avisit[nbr] == 0 
                 || bidx == bprev 
                 || rbnds[bidx] != 0)
                    remain--;
            }

            for (int j = 0; j < d; j++)
            {
                var nbr = atomAdj[idx][j];
                var bidx = bondAdj[idx][j];
                if (mode == SubstructureSelectionMode.ExactSmarts && avisit[nbr] == 0
                 || bidx == bprev 
                 || rbnds[bidx] != 0)
                    continue; // ignored
                remain--;
                if (avisit[nbr] == 0)
                {
                    // peripheral bond
                    if (remain > 0)
                        sb.Append('(');
                    sb.Append(bexpr[bidx]);
                    sb.Append(mol.Atoms[nbr].IsAromatic ? 'a' : '*');
                    if (remain > 0)
                        sb.Append(')');
                }
                else
                {
                    if (remain > 0)
                        sb.Append('(');
                    sb.Append(bexpr[bidx]);
                    EncodeExpr(nbr, bidx, sb);
                    if (remain > 0)
                        sb.Append(')');
                }
            }
        }

        /// <summary>
        /// Select the lowest ring number for use in SMARTS.
        /// </summary>
        /// <returns>ring number</returns>
        /// <exception cref="InvalidOperationException">all ring numbers are used</exception>
        private int ChooseRingNumber()
        {
            for (int i = 1; i < rnums.Length; i++)
            {
                if (rnums[i] == 0)
                {
                    rnums[i] = 1;
                    return i;
                }
            }
            throw new InvalidOperationException("No more ring numbers available!");
        }

        /// <summary>
        /// Releases a ring number allowing it to be reused.
        /// </summary>
        /// <param name="rnum">ring number</param>
        private void ReleaseRingNumber(int rnum)
        {
            rnums[rnum] = 0;
        }

        /// <summary>
        /// Encodes the atom at index (atmIdx) to a SMARTS
        /// expression that matches itself.
        /// </summary>
        /// <param name="atmIdx">atom index</param>
        /// <returns>SMARTS atom expression</returns>
        private string EncodeAtomExpr(int atmIdx)
        {
            var atom = mol.Atoms[atmIdx];
            var complex = mode == SubstructureSelectionMode.ExactSmarts;
            var sb = new StringBuilder();

            switch (atom.AtomicNumber)
            {
                case 0:  // *
                    sb.Append('*');
                    break;
                case 5:  // B
                case 6:  // C
                case 7:  // N
                case 8:  // O
                case 15: // P
                case 16: // S
                case 9:  // F
                case 17: // Cl
                case 35: // Br
                case 53: // I
                    sb.Append(atom.IsAromatic ? atom.Symbol.ToLowerInvariant() : atom.Symbol);
                    break;
                default:
                    complex = true;
                    sb.Append(atom.IsAromatic ? atom.Symbol.ToLowerInvariant() : atom.Symbol);
                    break;
            }

            if (mode == SubstructureSelectionMode.ExactSmarts)
            {
                var hcount = atom.ImplicitHydrogenCount.Value;
                var valence = hcount;
                var connections = hcount;

                var atmDeg = this.deg[atmIdx];
                for (int i = 0; i < atmDeg; i++)
                {
                    var bond = mol.Bonds[bondAdj[atmIdx][i]];
                    var nbr = bond.GetOther(atom);
                    if (nbr.AtomicNumber == 1)
                        hcount++;
                    var bord = bond.Order.IsUnset() ? 0 : bond.Order.Numeric();
                    if (bord == 0)
                        throw new ArgumentException("Molecule had unsupported zero-order or unset bonds!");
                    valence += bord;
                    connections++;
                }

                sb.Append('H').Append(hcount);
                sb.Append('v').Append(valence);
                sb.Append('X').Append(connections);
            }

            int chg = atom.FormalCharge ?? 0;

            if (chg <= -1 || chg >= +1)
            {
                if (chg >= 0)
                    sb.Append('+');
                else
                    sb.Append('-');
                var abs = Math.Abs(chg);
                if (abs > 1)
                    sb.Append(abs);
                complex = true;
            }
            else if (mode == SubstructureSelectionMode.ExactSmarts)
            {
                sb.Append("+0");
            }

            return complex ? $"[{sb.ToString()}]" : sb.ToString();
        }

        /// <summary>
        /// Encodes the bond at index (bondIdx) to a SMARTS
        /// expression that matches itself.
        /// </summary>
        /// <param name="bondIdx">bond index</param>
        /// <param name="beg">atom index of first atom</param>
        /// <param name="end">atom index of second atom</param>
        /// <returns>SMARTS bond expression</returns>
        private string EncodeBondExpr(int bondIdx, int beg, int end)
        {
            var bond = mol.Bonds[bondIdx];
            if (bond.Order == BondOrder.Unset)
                return "";

            var bArom = bond.IsAromatic;
            var aArom = mol.Atoms[beg].IsAromatic && mol.Atoms[end].IsAromatic;
            switch (bond.Order)
            {
                case BondOrder.Single:
                    if (bArom)
                    {
                        return aArom ? "" : ":";
                    }
                    else
                    {
                        return aArom ? "-" : "";
                    }
                case BondOrder.Double:
                    return bArom ? "" : "=";
                case BondOrder.Triple:
                    return "#";
                default:
                    throw new ArgumentException("Unsupported bond type: " + bond.Order);
            }
        }
    }
}
